library(tidyverse)
library(lfe)
library(broom)
library(kableExtra)

rm(list= ls())

## Load Data 

dat <- readRDS('data/wages_clean.rds')  %>%
  filter(substr(county_id, 1, 2) %in% c('05', '09')) ## subset to Bavaria/NRW

## Define outcome variables 

outcomevars <- names(dat)[str_detect(names(dat), 'wage')]

## Data

ba <- dat %>% 
  dplyr::select(one_of(outcomevars),
                year, county_id, AA.district.id,
                post, treated_post, treated) %>% 
  mutate(period = as.numeric(factor(year)))

## Nr of pre and post treatment periods

id_treat <- ba %>% 
  filter(post == 1) %>% 
  filter(period == min(period)) %>% 
  pull(period) %>% unique()

## Create Period*Treated Indicator Variables 

ba <- fastDummies::dummy_cols(ba, select_columns = 'period') %>% 
  mutate_at(vars(contains('period_')), ~ .*treated) 

## Generate Model Formulas 

periodvars <- names(ba)[str_detect(names(ba), 'period_')]

## Exclude last pre-treatment period 

periodvars <- periodvars[-which(periodvars == paste0('period_', id_treat - 1))]

results <- lapply(outcomevars, function(dv){
  
  model_formula <- as.formula(paste0(dv, ' ~', 
                                     paste0(periodvars, collapse = '+'), 
                                     '| period + county_id | 0 | AA.district.id'))
  
  model_est <- felm(model_formula, data = ba)
  n <- model_est$residuals %>% na.omit() %>% length()
  
  model_est <- model_est %>%
    broom::tidy(conf.int = T) %>% 
    filter(str_detect(term, 'period')) %>% 
    mutate(outcome = paste(dv)) %>% 
    mutate(n = n)
  
}) %>% reduce(rbind) 

## Rename the period variables

results <- results %>% 
  mutate(period = parse_number(term) - (id_treat - 1))

## Remove some outcomes

results <- results %>% 
  filter(!outcome %in% c('wage_all_wohnort', 'wage_higheduc'))

## Reorder Factor for Plotting 

results$outcome <- factor(results$outcome, levels = c('wage_all_arbeitsort',
                                                      'wage_male',
                                                      'wage_female',
                                                      'wage_german',
                                                      'wage_foreign',
                                                      'wage_loweduc',
                                                      'wage_voctrain',
                                                      'wage_15_25',
                                                      'wage_25_55',
                                                      'wage_55_65'))

## Rename Outcomes for Plotting 

results <- results %>%
  mutate(term = recode(term, 
                       'treated_post' = 'Treated * Post'),
         outcome = recode(outcome,
                          'wage_15_25' = '15 - 25 Years Old',
                          'wage_25_55'= '25 - 55 Years Old',
                          'wage_55_65' = '55 -- 65 Years Old',
                          'wage_voctrain' = 'Vocational Degree',
                          'wage_male' = 'Male', 
                          'wage_female' = 'Female',
                          'wage_german' = 'German Natives',
                          'wage_foreign' = 'Other Foreigners',
                          'wage_loweduc' = 'Low Education',
                          'wage_all_arbeitsort' = 'All Employees',
                          'wage_all_wohnort' = 'All Employees',
                          'wage_higheduc' = 'University Degree'),
         outcome = as.factor(outcome))

## Rename Periods for Plotting 

results <- results %>% 
  mutate(period = recode(period,
                         `-2` = '2014',
                         `-1` = '2015',
                         `1` = '2017',
                         `2` = '2018')) 

## Add zero 

n_y <- length(unique(results$outcome))

zero_period <- data.frame(term = rep('2016', n_y),
                          estimate = rep(0, n_y),
                          conf.low = rep(0, n_y),
                          conf.high =rep(0, n_y),
                          outcome = unique(results$outcome),
                          period = rep('2016', n_y))

results <- bind_rows(results, zero_period) %>%
  mutate(period = as.factor(period))


## Figure A6

p1 <- ggplot(results, aes(x = period, y = estimate, 
                          ymin = conf.low, ymax = conf.high)) +
  geom_hline(yintercept = 0, linetype = 'dotted', color = 'grey40') +
  geom_vline(xintercept = 2016, linetype = 'dotted', color = 'grey40') +
  geom_errorbar(width = 0) +
  geom_point(fill = 'white', shape = 21) +
  theme_bw() + 
  ylab('Coefficient Estimate') + 
  xlab('') + 
  facet_wrap(~ outcome, scales = 'free', nrow = 2) + 
  scale_color_grey(name = '') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  scale_x_discrete(breaks = seq(2014, 2018, 1))

p1


## Figure 7

results_short <- results %>%
  filter(outcome %in% c('All Employees', 'German Natives', 'Low Education'))

myLoc <- 
  (which(levels(factor(results_short$period)) == "2017") +
     which(levels(factor(results_short$period)) == "2016")) / 
  2


p2 <- ggplot(results_short, aes(x = factor(period), y = estimate, 
                          ymin = conf.low, ymax = conf.high)) +
  geom_hline(yintercept = 0, linetype = 'dotted', color = 'grey40') +
  geom_vline(aes(xintercept = myLoc), linetype = 'dotted', color = 'grey40') +
  geom_errorbar(width = 0) +
  geom_point(fill = 'white', shape = 21, size = 2) +
  theme_bw() + 
  ylab('Coefficient Estimate') + 
  xlab('') + 
  facet_wrap(~ outcome, nrow = 1) + 
  scale_color_grey(name = '') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_discrete(drop = FALSE) +
  xlab('')

p2


#### Table A3 details ####

## Kable 

l_out <- results_short %>% 
  dplyr::select(period, outcome, estimate, 
                std.error, p.value, n) %>% 
  mutate(across(3:6, ~round(., 3))) %>% 
  mutate(n = base::format(n,big.mark = ',')) %>% 
  filter(!period == '2016')

## Table

l_out %>%
  kableExtra::kable(., format = 'latex', 
                    booktabs = T,
                    linesep = "",
                    caption = 'Details on the wage results',
                    col.names = c('Year', 'Sample',
                                  'Estimate', 'SE', 'p-value', 'N')) %>%
  kable_styling(latex_options = c("hold_position", 'scale_down'), 
                position = "center") 













